# # -*- coding: utf-8 -*-
# import random
# import scrapy
# from scrapy import Request
# from scrapy.utils.project import get_project_settings
# from scrapy.exceptions import IgnoreRequest
# from zc_core.util.batch_gen import time_to_batch_no
# from zc_core.util.http_util import retry_request
# from zc_core.dao.sku_pool_dao import SkuPoolDao
# from zc_core.dao.batch_dao import BatchDao
# from zc_core.util.done_filter import DoneFilter
# from suzhou.rules import *
# from suzhou.utils.cookie_builder import build_cookie
#
#
# class FullSpider(BaseSpider):
#     name = 'full_bar'
#     # 常用链接
#     item_url = 'http://www.zfcgwssc.suzhou.gov.cn/commodities/{}?p_id={}'
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(FullSpider, self).__init__(*args, **kwargs)
#         if not batchNo:
#             self.batch_no = time_to_batch_no(datetime.now())
#         else:
#             self.batch_no = int(batchNo)
#         # 创建批次记录
#         BatchDao().create_batch(self.batch_no)
#         # 避免重复采集
#         self.done_filter = DoneFilter(self.batch_no)
#
#     def start_requests(self):
#         settings = get_project_settings()
#         pool_list = SkuPoolDao().get_sku_pool_list()
#         self.logger.info('全量: %s' % (len(pool_list)))
#         dist_list = [x for x in pool_list if not self.done_filter.contains(x.get('_id'))]
#         self.logger.info('目标：%s' % (len(dist_list)))
#         random.shuffle(dist_list)
#         for sku in dist_list:
#             sku_id = sku.get('_id')
#             spu_id = sku.get('spuId')
#             cat3_id = sku.get('catalog3Id')
#             # 避免无效采集
#             if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
#                 self.logger.info('已采: %s', sku_id)
#                 continue
#             # 采集商品关联关系
#             yield Request(
#                 url=self.item_url.format(spu_id, sku_id),
#                 meta={
#                     'reqType': 'item',
#                     'batchNo': self.batch_no,
#                     'skuId': sku_id,
#                     'spuId': spu_id,
#                     'catalog3Id': cat3_id,
#                 },
#                 headers={
#                     'Host': 'www.zfcgwssc.suzhou.gov.cn',
#                     'Connection': 'keep-alive',
#                     'Cache-Control': 'max-age=0',
#                     'Upgrade-Insecure-Requests': '1',
#                     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400',
#                     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
#                     'Accept-Encoding': 'gzip, deflate',
#                     'Accept-Language': 'zh-CN,zh;q=0.9',
#                     # 'Cookie': f'area_id=320505; _suzhou_session={build_cookie()}',
#                 },
#                 callback=self.parse_item_data,
#                 errback=self.error_back,
#             )
#
#     # 处理ItemData
#     def parse_item_data(self, response):
#         meta = response.meta
#         data = parse_item_data(response)
#         if data:
#             self.logger.info('商品: [%s]' % data.get('skuId'))
#             yield data
#         else:
#             self.logger.info('下架: %s' % meta.get('url'))
#
#     # 错误处理
#     def error_back(self, e):
#         if e.type and e.type == IgnoreRequest:
#             self.logger.info(e.value)
#         else:
#             if e.request:
#                 self.logger.error('请求异常: [%s][%s] -> [%s]' % (str(type(e)), e.request.url, e.request.meta))
#                 yield retry_request(e.request)
#             else:
#                 self.logger.error('未知异常: %s' % e)
